Goal: Dan suggested skipping the PCA step and just looking for metabolites associated with leaf length via penalized regression.
library(glmnet)
library(relaimpo)
library(tidyverse)
library(broom)
get leaflength data
leaflength <- read_csv("../../plant/output/leaf_lengths_metabolite.csv") %>%
mutate(pot=str_pad(pot, width=3, pad="0"),
sampleID=str_c("wyo", genotype, pot, sep="_")) %>%
select(sampleID, genotype, trt, leaf_avg_std)
── Column specification ──────────────────────────────────────────────────────────────────────────
cols(
pot = col_double(),
soil = col_character(),
genotype = col_character(),
trt = col_character(),
leaf_avg = col_double(),
leaf_avg_std = col_double()
)
leaflength %>% arrange(sampleID)
get and wrangle metabolite data
met_raw <-read_csv("../input/metabolites_set1.csv")
── Column specification ──────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
tissue = col_character(),
soil = col_character(),
genotype = col_character(),
autoclave = col_character(),
time_point = col_character(),
concatenate = col_character()
)
ℹ Use `spec()` for the full column specifications.
met <- met_raw %>%
mutate(pot=str_pad(pot, width = 3, pad = "0")) %>%
mutate(sampleID=str_c("wyo", genotype, pot, sep="_")) %>%
select(sampleID, genotype, tissue, sample_mass = `sample_mass mg`, !submission_number:concatenate) %>%
pivot_longer(!sampleID:sample_mass, names_to = "metabolite", values_to = "met_amount") %>%
#remove unnamed metabolites
filter(str_detect(metabolite, "[A-Z]|[a-z]")) %>%
#adjust by sample mass
mutate(met_per_mg=met_amount/sample_mass) %>%
#scale and center
group_by(metabolite, genotype, tissue) %>%
mutate(met_per_mg=scale(met_per_mg),
met_amt=scale(met_amount)
) %>%
pivot_wider(id_cols = sampleID,
names_from = c(tissue, metabolite),
values_from = starts_with("met_"),
names_sep = "_")
met
split this into two data frames, one normalized by tissue amount and one not.
met_per_mg <- met %>% select(sampleID, starts_with("met_per_mg")) %>%
as.data.frame() %>% column_to_rownames("sampleID")
met_amt <- met %>% select(sampleID, starts_with("met_amt")) %>%
as.data.frame() %>% column_to_rownames("sampleID")
get leaf data order to match
leaflength <- leaflength[match(met$sampleID, leaflength$sampleID),]
leaflength
Live vs blank dead
normalized
met_per_mg_lm <- met_per_mg %>%
rownames_to_column("sampleID") %>%
left_join(leaflength) %>%
mutate(trt=ifelse(str_detect(trt, "live"), "live", "blank_dead")) %>%
pivot_longer(cols=starts_with("met"), names_to = "metabolite") %>%
group_by(metabolite) %>%
nest() %>%
mutate(lm_trt=map(data, ~ lm(value ~ trt, data=.)),
lm_leaf=map(data, ~ lm(leaf_avg_std ~ value, data=.)))
Joining, by = "sampleID"
raw
met_amt_lm <- met_amt %>%
rownames_to_column("sampleID") %>%
left_join(leaflength) %>%
mutate(trt=ifelse(str_detect(trt, "live"), "live", "blank_dead")) %>%
pivot_longer(cols=starts_with("met"), names_to = "metabolite") %>%
group_by(metabolite) %>%
nest() %>%
mutate(lm_trt=map(data, ~ lm(value ~ trt, data=.)),
lm_leaf=map(data, ~ lm(leaf_avg_std ~ value, data=.)))
Joining, by = "sampleID"
met_amt_lm_results<- met_amt_lm %>% mutate(broomtidy.trt = map(lm_trt, broom::tidy),
broomtidy.leaf = map(lm_leaf, broom::tidy)) %>%
unnest(broomtidy.trt, broomtidy.leaf) %>%
select(metabolite, term.trt=term, p.value.trt=p.value,
term.leaf=term1, p.value.leaf=p.value1) %>%
mutate(FDR.trt=p.adjust(p.value.trt, method = "fdr"),
FDR.leaf=p.adjust(p.value.leaf, method = "fdr")) %>%
filter(! str_detect(term.trt, "Intercept"),
! str_detect(term.leaf, "Intercept"))
unnest() has a new interface. See ?unnest for details.
Try `df %>% unnest(c(broomtidy.trt, broomtidy.leaf))`, with `mutate()` if needed
met_amt_lm_results %>% ungroup() %>%
summarize(sig.trt=sum(FDR.trt<0.1), sig.leaf=sum(FDR.leaf<0.1), sig.both=sum(FDR.trt<0.1&FDR.leaf<0.1), count=n())
Fit 101 CVs for each of 11 alphas
set.seed(1245)
folds <- tibble(run=1:101) %>%
mutate(folds=map(run, ~ sample(rep(1:6,6))))
system.time (met_per_mg_multiCV <- expand_grid(run=1:100, alpha=round(seq(0,1,.1),1)) %>%
left_join(folds, by="run") %>%
mutate(fit=map2(folds, alpha, ~ cv.glmnet(x=as.matrix(met_per_mg),
y=leaflength$leaf_avg_std,
foldid = .x, alpha=.y
)))
#, lambda=exp(seq(-5,0,length.out = 50)) )))
) #100 seconds
user system elapsed
183.676 12.864 227.036
head(met_per_mg_multiCV)
for each fit, pull out the mean cv error, lambda, min lambda, and 1se lambda
met_per_mg_multiCV <- met_per_mg_multiCV %>%
mutate(cvm=map(fit, magrittr::extract("cvm")),
lambda=map(fit, magrittr::extract("lambda")),
lambda.min=map_dbl(fit, magrittr::extract("lambda.min" )),
lambda.1se=map_dbl(fit, magrittr::extract("lambda.1se")),
nzero=map(fit, magrittr::extract("nzero"))
)
head(met_per_mg_multiCV)
now calculate the mean and sem of cvm and min,1se labmdas. These need to be done separately because of the way the grouping works
met_per_mg_summary_cvm <- met_per_mg_multiCV %>% dplyr::select(-fit, -folds) %>%
unnest(c(cvm, lambda)) %>%
group_by(alpha, lambda) %>%
summarize(meancvm=mean(cvm), sem=sd(cvm)/sqrt(n()), high=meancvm+sem, low=meancvm-sem)
`summarise()` has grouped output by 'alpha'. You can override using the `.groups` argument.
met_per_mg_summary_cvm
met_per_mg_summary_lambda <- met_per_mg_multiCV %>% dplyr::select(-fit, -folds, -cvm) %>%
group_by(alpha) %>%
summarize(
lambda.min.sd=sd(lambda.min),
lambda.min.mean=mean(lambda.min),
#lambda.min.med=median(lambda.min),
lambda.min.high=lambda.min.mean+lambda.min.sd,
#lambda.min.low=lambda.min.mean-lambda.min.sem,
#lambda.1se.sem=sd(lambda.1se)/sqrt(n()),
lambda.1se.mean=mean(lambda.1se),
#lambda.1se.med=median(lambda.1se),
#lambda.1se.high=lambda.1se+lambda.1se.sem,
#lambda.1se.low=lambda.1se-lambda.1se.sem,
nzero=nzero[1],
lambda=lambda[1]
)
met_per_mg_summary_lambda
plot it
met_per_mg_summary_cvm %>%
#filter(alpha!=0) %>% # worse than everything else and throwing the plots off
ggplot(aes(x=log(lambda), y= meancvm, ymin=low, ymax=high)) +
geom_ribbon(alpha=.25) +
geom_line(aes(color=as.character(alpha))) +
facet_wrap(~ as.character(alpha)) +
coord_cartesian(xlim=(c(-5,0))) +
geom_vline(aes(xintercept=log(lambda.min.mean)), alpha=.5, data=met_per_mg_summary_lambda) +
geom_vline(aes(xintercept=log(lambda.min.high)), alpha=.5, data=met_per_mg_summary_lambda, color="blue")
Make a plot of MSE at minimum lambda for each alpha
met_per_mg_summary_cvm %>%
group_by(alpha) %>%
filter(rank(meancvm, ties.method = "first")==1) %>%
ggplot(aes(x=alpha,y=meancvm,ymin=low,ymax=high)) +
geom_ribbon(color=NA, fill="gray80") +
geom_line() +
geom_point()
Plot the number of nzero coefficients
met_per_mg_summary_lambda %>%
unnest(c(lambda, nzero)) %>%
group_by(alpha) %>%
filter(abs(lambda.min.mean-lambda)==min(abs(lambda.min.mean-lambda)) ) %>%
ungroup() %>%
ggplot(aes(x=as.character(alpha), y=nzero)) +
geom_point() +
ggtitle("Number of non-zero coefficents at minimum lambda") +
ylim(0,50)
OK let’s do repeated test train starting from these CV lambdas
multi_tt <- function(lambda, alpha, n=10000, sample_size=36, train_size=30, x, y=leaflength$leaf_avg_std) {
print(lambda)
print(alpha)
tt <-
tibble(run=1:n) %>%
mutate(train=map(run, ~ sample(1:sample_size, train_size))) %>%
mutate(fit=map(train, ~ glmnet(x=x[.,], y=y[.], lambda = lambda, alpha = alpha ))) %>%
mutate(pred=map2(fit, train, ~ predict(.x, newx = x[-.y,]))) %>%
mutate(cor=map2_dbl(pred, train, ~ cor(.x, y[-.y]) )) %>%
mutate(MSE=map2_dbl(pred, train, ~ mean((y[-.y] - .x)^2))) %>%
summarize(
num_na=sum(is.na(cor)),
num_lt_0=sum(cor<=0, na.rm=TRUE),
avg_cor=mean(cor, na.rm=TRUE),
avg_MSE=mean(MSE))
tt
}
per_mg_fit_test_train <- met_per_mg_summary_lambda %>%
select(alpha, lambda.min.mean)
per_mg_fit_test_train <- met_per_mg_multiCV %>%
filter(run==1) %>%
select(alpha, fit) %>%
right_join(per_mg_fit_test_train)
Joining, by = "alpha"
per_mg_fit_test_train <- per_mg_fit_test_train %>%
mutate(pred_full=map2(fit, lambda.min.mean, ~ predict(.x, s=.y, newx=as.matrix(met_per_mg))),
full_R=map_dbl(pred_full, ~ cor(.x, leaflength$leaf_avg_std)),
full_MSE=map_dbl(pred_full, ~ mean((leaflength$leaf_avg_std-.x)^2))) %>%
mutate(tt=map2(lambda.min.mean, alpha, ~ multi_tt(lambda=.x, alpha=.y, x=as.matrix(met_per_mg))))
[1] 23.42921
[1] 0
[1] 2.153769
[1] 0.1
[1] 1.359696
[1] 0.2
[1] 1.011106
[1] 0.3
[1] 0.8373424
[1] 0.4
[1] 0.7287725
[1] 0.5
[1] 0.6419847
[1] 0.6
[1] 0.5728218
[1] 0.7
the standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zero
[1] 0.5127878
[1] 0.8
the standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zero
[1] 0.461043
[1] 0.9
the standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zero
[1] 0.4188835
[1] 1
the standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zerothe standard deviation is zero
(per_mg_fit_test_train <- per_mg_fit_test_train %>% unnest(tt))
per_mg_fit_test_train %>%
ggplot(aes(x=alpha)) +
geom_line(aes(y=avg_cor), color="red") +
geom_point(aes(y=avg_cor), color="red") +
geom_line(aes(y=avg_MSE), color="blue") +
geom_point(aes(y=avg_MSE), color="blue")
alpha_per_mg <- .1
best_per_mg <- per_mg_fit_test_train %>% filter(alpha == alpha_per_mg)
best_per_mg_fit <- best_per_mg$fit[[1]]
best_per_mg_lambda <- best_per_mg$lambda.min.mean
per_mg_coef.tb <- coef(best_per_mg_fit, s=best_per_mg_lambda) %>%
as.matrix() %>% as.data.frame() %>%
rownames_to_column(var="metabolite") %>%
rename(beta=`1`)
per_mg_coef.tb %>% filter(beta!=0) %>% arrange(desc(abs(beta)))
NA
pred and obs
plot(leaflength$leaf_avg_std, best_per_mg$pred_full[[1]])
cor.test(leaflength$leaf_avg_std, best_per_mg$pred_full[[1]]) #.57
Pearson's product-moment correlation
data: leaflength$leaf_avg_std and best_per_mg$pred_full[[1]]
t = 6.5696, df = 34, p-value = 1.583e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5559761 0.8641139
sample estimates:
cor
0.7479016
best_per_mg$full_MSE
[1] 0.617451
per_mg_coef.tb_and_lm <- per_mg_coef.tb %>%
filter(beta !=0, str_detect(metabolite, "Intercept", negate = TRUE)) %>%
left_join(met_per_mg_lm_results) %>%
arrange(desc(abs(beta))) %>%
select(-term.trt, -term.leaf)
Joining, by = "metabolite"
write_csv(per_mg_coef.tb_and_lm, "met_per_mg_elastic_leaf_lm_trt.csv")
per_mg_coef.tb_and_lm
Fit 101 CVs for each of 11 alphas
set.seed(1245)
folds <- tibble(run=1:101) %>%
mutate(folds=map(run, ~ sample(rep(1:6,6))))
system.time (met_amt_multiCV <- expand_grid(run=1:100, alpha=round(seq(0,1,.1),1)) %>%
left_join(folds, by="run") %>%
mutate(fit=map2(folds, alpha, ~ cv.glmnet(x=as.matrix(met_amt), y=leaflength$leaf_avg_std, foldid = .x, alpha=.y
)))
#, lambda=exp(seq(-5,0,length.out = 50)) )))
) #100 seconds
user system elapsed
150.207 10.925 202.175
head(met_amt_multiCV)
for each fit, pull out the mean cv error, lambda, min lambda, and 1se lambda
met_amt_multiCV <- met_amt_multiCV %>%
mutate(cvm=map(fit, magrittr::extract("cvm")),
lambda=map(fit, magrittr::extract("lambda")),
lambda.min=map_dbl(fit, magrittr::extract("lambda.min" )),
lambda.1se=map_dbl(fit, magrittr::extract("lambda.1se")),
nzero=map(fit, magrittr::extract("nzero"))
)
head(met_amt_multiCV)
now calculate the mean and sem of cvm and min,1se labmdas. These need to be done separately because of the way the grouping works
met_amt_summary_cvm <- met_amt_multiCV %>% dplyr::select(-fit, -folds) %>%
unnest(c(cvm, lambda)) %>%
group_by(alpha, lambda) %>%
summarize(meancvm=mean(cvm), sem=sd(cvm)/sqrt(n()), high=meancvm+sem, low=meancvm-sem)
`summarise()` has grouped output by 'alpha'. You can override using the `.groups` argument.
met_amt_summary_cvm
met_amt_summary_lambda <- met_amt_multiCV %>% dplyr::select(-fit, -folds, -cvm) %>%
group_by(alpha) %>%
summarize(
lambda.min.sd=sd(lambda.min),
lambda.min.mean=mean(lambda.min),
#lambda.min.med=median(lambda.min),
lambda.min.high=lambda.min.mean+lambda.min.sd,
#lambda.min.low=lambda.min.mean-lambda.min.sem,
#lambda.1se.sem=sd(lambda.1se)/sqrt(n()),
lambda.1se.mean=mean(lambda.1se),
#lambda.1se.med=median(lambda.1se),
#lambda.1se.high=lambda.1se+lambda.1se.sem,
#lambda.1se.low=lambda.1se-lambda.1se.sem,
nzero=nzero[1],
lambda=lambda[1]
)
met_amt_summary_lambda
plot it
met_amt_summary_cvm %>%
#filter(alpha!=0) %>% # worse than everything else and throwing the plots off
ggplot(aes(x=log(lambda), y= meancvm, ymin=low, ymax=high)) +
geom_ribbon(alpha=.25) +
geom_line(aes(color=as.character(alpha))) +
facet_wrap(~ as.character(alpha)) +
coord_cartesian(xlim=(c(-5,0))) +
geom_vline(aes(xintercept=log(lambda.min.mean)), alpha=.5, data=met_amt_summary_lambda) +
geom_vline(aes(xintercept=log(lambda.min.high)), alpha=.5, data=met_amt_summary_lambda, color="blue")
Make a plot of MSE at minimum lambda for each alpha
met_amt_summary_cvm %>%
group_by(alpha) %>%
filter(rank(meancvm, ties.method = "first")==1) %>%
ggplot(aes(x=alpha,y=meancvm,ymin=low,ymax=high)) +
geom_ribbon(color=NA, fill="gray80") +
geom_line() +
geom_point()
Plot the number of nzero coefficients
met_amt_summary_lambda %>%
unnest(c(lambda, nzero)) %>%
group_by(alpha) %>%
filter(abs(lambda.min.mean-lambda)==min(abs(lambda.min.mean-lambda)) ) %>%
ungroup() %>%
ggplot(aes(x=as.character(alpha), y=nzero)) +
geom_point() +
ggtitle("Number of non-zero coefficents at minimum lambda") +
ylim(0,50)
OK let’s do repeated test train starting from these CV lambdas
multi_tt <- function(lambda, alpha, n=10000, sample_size=36, train_size=30, x, y=leaflength$leaf_avg_std) {
print(lambda)
print(alpha)
tt <-
tibble(run=1:n) %>%
mutate(train=map(run, ~ sample(1:sample_size, train_size))) %>%
mutate(fit=map(train, ~ glmnet(x=x[.,], y=y[.], lambda = lambda, alpha = alpha ))) %>%
mutate(pred=map2(fit, train, ~ predict(.x, newx = x[-.y,]))) %>%
mutate(cor=map2_dbl(pred, train, ~ cor(.x, y[-.y]) )) %>%
mutate(MSE=map2_dbl(pred, train, ~ mean((y[-.y] - .x)^2))) %>%
summarize(
num_na=sum(is.na(cor)),
num_lt_0=sum(cor<=0, na.rm=TRUE),
avg_cor=mean(cor, na.rm=TRUE),
avg_MSE=mean(MSE))
tt
}
amt_fit_test_train <- met_amt_summary_lambda %>%
select(alpha, lambda.min.mean)
amt_fit_test_train <- met_amt_multiCV %>%
filter(run==1) %>%
select(alpha, fit) %>%
right_join(amt_fit_test_train)
Joining, by = "alpha"
amt_fit_test_train <- amt_fit_test_train %>%
mutate(pred_full=map2(fit, lambda.min.mean, ~ predict(.x, s=.y, newx=as.matrix(met_amt))),
full_R=map_dbl(pred_full, ~ cor(.x, leaflength$leaf_avg_std)),
full_MSE=map_dbl(pred_full, ~ mean((leaflength$leaf_avg_std-.x)^2))) %>%
mutate(tt=map2(lambda.min.mean, alpha, ~ multi_tt(lambda=.x, alpha=.y, x=as.matrix(met_amt))))
[1] 15.81867
[1] 0
[1] 1.628085
[1] 0.1
[1] 1.005012
[1] 0.2
[1] 0.7546315
[1] 0.3
[1] 0.602976
[1] 0.4
[1] 0.5073558
[1] 0.5
[1] 0.4406727
[1] 0.6
[1] 0.3961641
[1] 0.7
[1] 0.3522546
[1] 0.8
[1] 0.3196981
[1] 0.9
[1] 0.2933389
[1] 1
(amt_fit_test_train <- amt_fit_test_train %>% unnest(tt))
amt_fit_test_train %>%
ggplot(aes(x=alpha)) +
geom_line(aes(y=avg_cor), color="red") +
geom_point(aes(y=avg_cor), color="red") +
geom_line(aes(y=avg_MSE), color="blue") +
geom_point(aes(y=avg_MSE), color="blue")
Not a ton of difference here. Ignoring 0.0, then 0.1 or 1 ar the best…kind of strange! ## look at fit:
alpha_amt <- .1
best_amt <- amt_fit_test_train %>% filter(alpha == alpha_amt)
best_amt_fit <- best_amt$fit[[1]]
best_amt_lambda <- best_amt$lambda.min.mean
amt_coef.tb <- coef(best_amt_fit, s=best_amt_lambda) %>%
as.matrix() %>% as.data.frame() %>%
rownames_to_column(var="metabolite") %>%
rename(beta=`1`)
amt_coef.tb %>% filter(beta!=0) %>% arrange(desc(abs(beta)))
NA
pred and obs
plot(leaflength$leaf_avg_std, best_amt$pred_full[[1]])
cor.test(leaflength$leaf_avg_std, best_amt$pred_full[[1]]) #.736
Pearson's product-moment correlation
data: leaflength$leaf_avg_std and best_amt$pred_full[[1]]
t = 9.3573, df = 34, p-value = 6.215e-11
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.7212855 0.9205609
sample estimates:
cor
0.8487052
best_amt$full_MSE
[1] 0.4243047
amt_coef.tb_and_lm <- amt_coef.tb %>%
filter(beta !=0, str_detect(metabolite, "Intercept", negate = TRUE)) %>%
left_join(met_amt_lm_results) %>%
arrange(desc(abs(beta))) %>%
select(-term.trt, -term.leaf)
Joining, by = "metabolite"
write_csv(amt_coef.tb_and_lm, "met_amt_elastic_leaf_lm_trt.csv")
amt_coef.tb_and_lm